{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "pd.set_option(\"display.max_rows\",30)\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "class dataset:\n",
    "    kdd_train_2labels = pd.read_pickle(\"dataset/kdd_train_2labels_20percent.pkl\")\n",
    "    kdd_train_2labels_y = pd.read_pickle(\"dataset/kdd_train_2labels_y_20percent.pkl\")\n",
    "    \n",
    "    kdd_test_2labels = pd.read_pickle(\"dataset/kdd_test_2labels_20percent.pkl\")\n",
    "    kdd_test_2labels_y = pd.read_pickle(\"dataset/kdd_test_2labels_y_20percent.pkl\")\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from sklearn.preprocessing import LabelEncoder\n",
    "\n",
    "le_2labels = LabelEncoder()\n",
    "dataset.y_train_2labels = le_2labels.fit_transform(dataset.kdd_train_2labels_y)\n",
    "dataset.y_test_2labels = le_2labels.transform(dataset.kdd_test_2labels_y)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from itertools import product\n",
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "class preprocessing:\n",
    "    x_train = dataset.kdd_train_2labels.iloc[:,:-2].values\n",
    "    y_train = np.array(dataset.y_train_2labels)\n",
    "\n",
    "    x_test, y_test = (dataset.kdd_test_2labels.iloc[:,:-2].values, \n",
    "                      np.array(dataset.y_test_2labels))\n",
    "\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Using TensorFlow backend.\n"
     ]
    }
   ],
   "source": [
    "from collections import namedtuple\n",
    "import numpy as np\n",
    "from keras.models import Sequential\n",
    "from keras.layers import Dense, Dropout\n",
    "from keras.layers.normalization import BatchNormalization\n",
    "from keras import optimizers\n",
    "from keras import regularizers\n",
    "\n",
    "class Train:\n",
    "    score = namedtuple(\"score\", ['epoch', 'no_of_features','hidden_layers','test_loss', 'test_score'])\n",
    "    model_detail = namedtuple(\"model_detail\", ['epoch', 'no_of_features','hidden_layers', 'model'])\n",
    "    scores = []\n",
    "    models = []\n",
    "    def execute(x_train, x_test, \n",
    "                y_train, y_test, \n",
    "                input_dim, no_of_features, hidden_layers,\n",
    "                epochs = 40, keep_prob = 0.4):\n",
    "        \n",
    "        print(\"Training for no_of_features: {}, hidden_layer: {}\".format(no_of_features, hidden_layers\n",
    "                                                                        ))\n",
    "        model = Sequential()\n",
    "        model.add(Dense(no_of_features, input_dim=input_dim, activation='relu'))\n",
    "        model.add(Dropout(keep_prob))\n",
    "        model.add(BatchNormalization())\n",
    "        \n",
    "        for i in range(hidden_layers - 1):\n",
    "            model.add(Dense(no_of_features, activation='relu'))\n",
    "            model.add(Dropout(keep_prob))\n",
    "            model.add(BatchNormalization())\n",
    "\n",
    "        \n",
    "        model.add(Dense(1, activation='sigmoid'))\n",
    "\n",
    "        model.compile(loss='binary_crossentropy',\n",
    "                      optimizer=\"Adam\",\n",
    "                      metrics=['accuracy'])\n",
    "\n",
    "        x_train, x_valid, y_train, y_valid = train_test_split(x_train, y_train, test_size=.6)\n",
    "        \n",
    "        model.fit(x_train, y_train,\n",
    "                  validation_data=(x_valid, y_valid),\n",
    "                  epochs=epochs,\n",
    "                  batch_size=128,\n",
    "                  verbose = 0)\n",
    "        \n",
    "        curr_score = model.evaluate(x_test, y_test) #, batch_size=128)\n",
    "        print(\"\\n Loss: {}, Accuracy: {}\".format(curr_score[0], curr_score[1])  )\n",
    "        Train.scores.append(Train.score(epochs,no_of_features,hidden_layers,curr_score[0], curr_score[1]))\n",
    "        Train.models.append(Train.model_detail(epochs,no_of_features,hidden_layers,model))\n",
    "                "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Training for no_of_features: 4, hidden_layer: 2\n",
      "10816/11850 [==========================>...] - ETA: 0s\n",
      " Loss: 1.1975026383480443, Accuracy: 0.4639662447458581\n",
      "Training for no_of_features: 4, hidden_layer: 4\n",
      "11840/11850 [============================>.] - ETA: 0s\n",
      " Loss: 0.8347828163979929, Accuracy: 0.37940928270042196\n",
      "Training for no_of_features: 4, hidden_layer: 6\n"
     ]
    }
   ],
   "source": [
    "features_arr = [4, 8, 16, 32, 64, 128, 256, 1024]\n",
    "hidden_layers_arr = [2, 4, 6, 50, 100]\n",
    "\n",
    "\n",
    "for f, h in product(features_arr, hidden_layers_arr):\n",
    "    Train.execute(preprocessing.x_train, preprocessing.x_test, \n",
    "                  preprocessing.y_train, preprocessing.y_test, \n",
    "                 118, f, h)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "pd.DataFrame(Train.scores)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "for m in Train.models:\n",
    "    m.model.save(\"dataset/keras_model_epoch_{}_no_of_features_{}_hidden_layers_{}\".format(m.epoch,\n",
    "                                                                                         m.no_of_features,\n",
    "                                                                                         m.hidden_layers))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "_draft": {
   "nbviewer_url": "https://gist.github.com/9b2f6b44497639602df5aa2351dec6c3"
  },
  "anaconda-cloud": {},
  "gist": {
   "data": {
    "description": "Accuracies for 50 epochs",
    "public": false
   },
   "id": "9b2f6b44497639602df5aa2351dec6c3"
  },
  "kernelspec": {
   "display_name": "Python [conda env:p3]",
   "language": "python",
   "name": "conda-env-p3-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
